#load libraries
library(vcfR)
## Warning: package 'vcfR' was built under R version 3.5.2
##
## ***** *** vcfR *** *****
## This is vcfR 1.9.0
## browseVignettes('vcfR') # Documentation
## citation('vcfR') # Citation
## ***** ***** ***** *****
library(RADstackshelpR)
library(introgress)
## Loading required package: nnet
## Loading required package: genetics
## Warning: package 'genetics' was built under R version 3.5.2
## Loading required package: combinat
##
## Attaching package: 'combinat'
## The following object is masked from 'package:utils':
##
## combn
## Loading required package: gdata
## gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.
##
## gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.
##
## Attaching package: 'gdata'
## The following object is masked from 'package:stats':
##
## nobs
## The following object is masked from 'package:utils':
##
## object.size
## The following object is masked from 'package:base':
##
## startsWith
## Loading required package: gtools
## Loading required package: MASS
## Loading required package: mvtnorm
## Warning: package 'mvtnorm' was built under R version 3.5.2
##
## NOTE: THIS PACKAGE IS NOW OBSOLETE.
##
## The R-Genetics project has developed an set of enhanced genetics
## packages to replace 'genetics'. Please visit the project homepage
## at http://rgenetics.org for informtion.
##
##
## Attaching package: 'genetics'
## The following objects are masked from 'package:base':
##
## %in%, as.factor, order
## Loading required package: RColorBrewer
#load data
vcf<-read.vcfR("/Users/devder/Desktop/parrots/la.parrots/parrotsnpsunfiltered.vcf")
## Scanning file to determine attributes.
## File attributes:
## meta lines: 2
## header_line: 3
## variant count: 10870
## column count: 39
##
Meta line 2 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 10870
## Character matrix gt cols: 39
## skip: 0
## nrows: 10870
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant: 10870
## All variants processed
#read in locality info for samples
locs<-read.table("~/Desktop/parrots/la.parrots/parrotsamples.txt", header=T, sep="\t")
colnames(locs)<-c("id","location","tiss")
head(vcf@fix)
## CHROM POS ID REF ALT QUAL FILTER INFO
## [1,] "uce-1572" "208" "SNP_1" "A" "G" "-1" NA NA
## [2,] "uce-1572" "444" "SNP_2" "G" "A" "-1" NA NA
## [3,] "uce-4474" "89" "SNP_3" "G" "A" "-1" NA NA
## [4,] "uce-735" "67" "SNP_4" "C" "A" "-1" NA NA
## [5,] "uce-1371" "378" "SNP_5" "A" "G" "-1" NA NA
## [6,] "uce-5661" "481" "SNP_6" "G" "T" "-1" NA NA
vcf@gt[1:5,1:5]
## FORMAT AmafinB646 AmafinB714 AmafinMLZ12743 AmafinMLZ25447
## [1,] "GT" "0/1" "0/0" NA NA
## [2,] "GT" "0/0" "0/0" "0/0" "0/0"
## [3,] "GT" "0/1" "0/0" "0/0" "0/0"
## [4,] "GT" "0/1" "0/0" "0/0" "0/0"
## [5,] "GT" "0/0" "0/0" NA NA
#No info on depth of coverage or genotype quality in this vcf. What were our filtering parameters in gatk?
#check that sampling IDs in text file match sample IDs in the vcf
colnames(vcf@gt)[-1] == locs$id
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
#check out missing data
missing.by.snp(vcf)
## Picking joint bandwidth of 0.11
## filt missingness snps.retained
## 1 0.30 0.24962043 10714
## 2 0.50 0.24733446 10642
## 3 0.60 0.22119187 9425
## 4 0.65 0.18170055 7574
## 5 0.70 0.15950759 6607
## 6 0.75 0.10862955 4670
## 7 0.80 0.08567444 3944
## 8 0.85 0.05239711 2948
## 9 0.90 0.03645960 2463
## 10 0.95 0.01090186 1593
## 11 1.00 0.00000000 1072
#probably worth doing some cursory filtering for missing data
#calculate missingness per sample
mat<-extract.gt(vcf)
miss<-c()
for (i in 1:ncol(mat)){
miss[i]<-sum(is.na(mat[,i]))
}
locs$miss<-miss
locs
## id location tiss miss
## 1 AmafinB646 CA frozen 140
## 2 AmafinB714 CA frozen 126
## 3 AmafinMLZ12743 Mexico toe pad 7229
## 4 AmafinMLZ25447 Mexico toe pad 6278
## 5 AmafinMLZ46005 Mexico toe pad 5542
## 6 AmafinMLZ54378 Mexico toe pad 5462
## 7 AmavirB564 CA frozen 156
## 8 AmavirB645 CA frozen 173
## 9 AmavirB715 CA frozen 164
## 10 AmavirB716 CA frozen 4292
## 11 AmavirB717 CA frozen 170
## 12 AmavirB718 CA frozen 266
## 13 AmavirB720 CA frozen 4173
## 14 AmavirB721 CA frozen 176
## 15 AmavirB724 CA frozen 167
## 16 AmavirB725 CA frozen 180
## 17 AmavirB726 CA frozen 4281
## 18 AmavirB727 CA frozen 162
## 19 AmavirB728 CA frozen 183
## 20 AmavirB729 CA frozen 169
## 21 AmavirB730 CA frozen 4349
## 22 AmavirB731 CA frozen 460
## 23 AmavirB734 CA frozen 166
## 24 AmavirB755 CA frozen 4659
## 25 AmavirLA7603 CA frozen 4593
## 26 AmavirLA8540 CA frozen 143
## 27 AmavirMLZ32264 Mexico toe pad 8309
## 28 AmavirMLZ39529 Mexico toe pad 8099
## 29 AmavirMLZ39531 Mexico toe pad 7867
## 30 AmavirMLZ48335 Mexico toe pad 6324
elevated number of missing genotypes in toepad samples, as expected
#remove loci that aren't biallelic
mat<-mat[nchar(vcf@fix[,5]) == 1,]
dim(mat)
## [1] 10841 30
#identify SNPs that are fixed away from LA
#conv.mat[1:5,1:5]
conv.mat<-mat
conv.mat[conv.mat == "0/0"]<-0
conv.mat[conv.mat == "0/1"]<-1
conv.mat[conv.mat == "1/1"]<-2
conv.mat<-as.data.frame(conv.mat)
#convert to numeric
for (i in 1:ncol(conv.mat)){
conv.mat[,i]<-as.numeric(as.character(conv.mat[,i]))
}
#show colnames to verify you're subsetting correctly
#colnames(conv.mat)
#calc AF
lilac.af<-(rowSums(conv.mat[,c(3:6)], na.rm=T)/(rowSums(is.na(conv.mat[,c(3:6)]) == FALSE)))/2
red.af<-(rowSums(conv.mat[,c(27:30)], na.rm=T)/(rowSums(is.na(conv.mat[,c(27:30)]) == FALSE)))/2
#find fixed SNPs
diff<-abs(lilac.af - red.af)
#how many SNPs are missing all 4 genotypes in one of the two populations
table(is.na(diff))
##
## FALSE TRUE
## 4618 6223
#how many SNPs are fixed
table(is.na(diff) == FALSE & diff == 1)
##
## FALSE TRUE
## 10422 419
#list some fixed SNPs
head(vcf@fix[,1][is.na(diff) == FALSE & diff == 1])
## [1] "uce-5661" "uce-7816" "uce-7816" "uce-6933" "uce-2714" "uce-4401"
#subsample original matrix to only fixed diff SNPs
gen.mat<-mat[is.na(diff) == FALSE & diff == 1,]
dim(gen.mat)
## [1] 419 30
#subsample matrix converted for AF calcs to only fixed SNPS
conv.mat<-conv.mat[is.na(diff) == FALSE & diff == 1,]
dim(conv.mat)
## [1] 419 30
#write a logical test to convert alleles so that a single number represents one parental ancestry
for (i in 1:nrow(gen.mat)){
#if 1 is the red crowned allele (e.g. absent in lilacs 3-6)
if((sum(conv.mat[i,c(3:6)], na.rm=T)/(sum(is.na(conv.mat[i,c(3:6)]) == FALSE)))/2 == 0){
#swap all '0/0' cells with '2/2'
gen.mat[i,][gen.mat[i,] == "0/0"]<-"2/2"
#swap all '1/1' cells with '0/0'
gen.mat[i,][gen.mat[i,] == "1/1"]<-"0/0"
#finally convert all '2/2' cells (originally 0/0) into '1/1'
gen.mat[i,][gen.mat[i,] == "2/2"]<-"1/1"
#no need to touch hets
}
}
#convert R class NAs to the string "NA/NA"
gen.mat[is.na(gen.mat) == TRUE]<-"NA/NA"
#if it worked correctly this should be only missing or '1/1'
table(gen.mat[,3])
##
## 1/1 NA/NA
## 207 212
table(gen.mat[,4])
##
## 1/1 NA/NA
## 292 127
table(gen.mat[,5])
##
## 1/1 NA/NA
## 344 75
table(gen.mat[,6])
##
## 1/1 NA/NA
## 325 94
#prepare data structures for introgress input
#make locus info df
locus.info<-data.frame(locus=vcf@fix[vcf@fix[,3] %in% rownames(gen.mat),1],
type=rep("C", times=nrow(gen.mat)),
lg=gsub("uce-","",vcf@fix[vcf@fix[,3] %in% rownames(gen.mat),1]),
marker.pos=gsub("SNP_","",rownames(gen.mat)))
#make linkage group numeric
locus.info$lg<-as.numeric(as.character(locus.info$lg))
locus.info$marker.pos<-as.numeric(as.character(locus.info$marker.pos))
#we now have a gt matrix in proper format for introgress
#convert genotype data into a matrix of allele counts
count.matrix<-prepare.data(admix.gen=gen.mat, loci.data=locus.info,
parental1="1",parental2="0", pop.id=F,
ind.id=F, fixed=T)
## prepare.data is working; this may take a moment
## Processing data for 30 individuals and 419 loci.
#estimate hybrid index values
hi.index.sim<-est.h(introgress.data=count.matrix,loci.data=locus.info,
fixed=T, p1.allele="1", p2.allele="0")
## est.h is working; this may take a few minutes
#plot introgress plots with all 419 fixed different SNPs
locus.info$locus<-rep("", times=nrow(locus.info))
#LociDataSim1$lg<-c(1:110)
mk.image(introgress.data=count.matrix, loci.data=locus.info,
marker.order=NULL,hi.index=hi.index.sim, ylab.image="Individuals",
xlab.h="Red-crowned ancestry", pdf=F,
col.image=c("red","green","blue"))
all 419 fixed SNPs regardless of missing data
dev.off() #reset plot window
## null device
## 1
#calculate mean heterozygosity
het<-calc.intersp.het(introgress.data=count.matrix)
#dev.off()
#make triangle plot
plot(x=hi.index.sim$h, y=het, bg=rgb(0,0,0,alpha=0.3), pch=21, cex=2, col="black",
xlab="Hybrid Index", ylab="Interspecific heterozygosity",
ylim=c(0,1))
segments(x0 =0, y0 =0, x1 =.5, y1 =1)
segments(x0 =1, y0 =0, x1 =.5, y1 =1)
filter the fixed differences to investigate role of missing genotypes in toepad samples
#calculate number of missing genotypes in our parental pops for each of the 419 fixed sites
lilac.miss<-c()
red.miss<-c()
for (i in 1:nrow(gen.mat)){
lilac.miss[i]<-sum(gen.mat[i,3:6] == "NA/NA")
red.miss[i]<-sum(gen.mat[i,27:30] == "NA/NA")
}
#filter gen.mat to have a minimum of 2 genotypes in both species to call a site fixed
gen.mat.filtered<-gen.mat[lilac.miss <= 2 & red.miss <= 2,]
dim(gen.mat.filtered) #214 SNPs left
## [1] 214 30
locus.info.filtered<-locus.info[lilac.miss <= 2 & red.miss <= 2,]
#revisualize to see if this affects downstream inference
#convert genotype data into a matrix of allele counts
count.matrix<-prepare.data(admix.gen=gen.mat.filtered, loci.data=locus.info.filtered,
parental1="1",parental2="0", pop.id=F,
ind.id=F, fixed=T)
## prepare.data is working; this may take a moment
## Processing data for 30 individuals and 214 loci.
#estimate hybrid index values
hi.index.sim<-est.h(introgress.data=count.matrix,loci.data=locus.info.filtered,
fixed=T, p1.allele="1", p2.allele="0")
## est.h is working; this may take a few minutes
#plot
mk.image(introgress.data=count.matrix, loci.data=locus.info.filtered,
marker.order=NULL,hi.index=hi.index.sim, ylab.image="Individuals",
xlab.h="Red-crowned ancestry", pdf=F,
col.image=c("red","green","blue"))
Thisplot shows 214 fixed SNPs with 50% missing data cutoff in both parental pops. This looks much better. Individuals 1-4 are Lilacs from Mexico, 5&6 are lilacs from LA, 7-9 are apparent hybrids, 10-26 are red crowns from LA, 27:30 are red crowns from Mexico. White bars are UCE loci. UCE 7322 is fascinating. Every single bird we sequenced from LA (except a single het) is homozygous lilac for the fixed for SNP 489. Strong evidence for adaptive introgression of this locus. Definitely worth mining out the full sequence of this UCE and mapping it to zebra finch genome to investigate putative function
dev.off() #reset plot window
## null device
## 1
#calculate mean heterozygosity
het<-calc.intersp.het(introgress.data=count.matrix)
#dev.off()
#make triangle plot
plot(x=hi.index.sim$h, y=het, bg=rgb(0,0,0,alpha=0.3), pch=21, cex=2, col="black",
xlab="Hybrid Index", ylab="Interspecific heterozygosity",
ylim=c(0,1))
segments(x0 =0, y0 =0, x1 =.5, y1 =1)
segments(x0 =1, y0 =0, x1 =.5, y1 =1)
##filter gen.mat to allow no missing data in either species to call a site fixed
##assess downstream effects
#gen.mat.filtered<-gen.mat[lilac.miss == 0 & red.miss == 0,]
#dim(gen.mat.filtered) #214 SNPs left
#locus.info.filtered<-locus.info[lilac.miss == 0 & red.miss == 0,]
#
##revisualize to see if this affects downstream inference
##convert genotype data into a matrix of allele counts
#count.matrix<-prepare.data(admix.gen=gen.mat.filtered, loci.data=locus.info.filtered,
# parental1="1",parental2="0", pop.id=F,
# ind.id=F, fixed=T)
#
##estimate hybrid index values
#hi.index.sim<-est.h(introgress.data=count.matrix,loci.data=locus.info.filtered,
# fixed=T, p1.allele="1", p2.allele="0")
#
##plot
#mk.image(introgress.data=count.matrix, loci.data=locus.info.filtered,
# marker.order=NULL,hi.index=hi.index.sim, ylab.image="Individuals",
# xlab.h="Red-crowned ancestry", pdf=F,
# col.image=c("red","green","blue"))
This looks cleaner, but I actually think we lose a lot of the signal showing just how messy this is. This reduced dataset makes it seem like almost all of the introgressed alleles are exlusively found as het sites, which didn't really seem to be the case when we allowed more missing data above.
dev.off() #reset plot window
## null device
## 1
##calculate mean heterozygosity
#het<-calc.intersp.het(introgress.data=count.matrix)
##make triangle plot
#plot(x=hi.index.sim$h, y=het, bg=rgb(0,0,0,alpha=0.3), pch=21, cex=2, col="black",
# xlab="Hybrid Index", ylab="Interspecific heterozygosity",
# ylim=c(0,1))
#segments(x0 =0, y0 =0, x1 =.5, y1 =1)
#segments(x0 =1, y0 =0, x1 =.5, y1 =1)
#
##removing the missing data certainly seems to make our triangle plot cleaner though,
#so we just have to decide which trade off we want
#do genomic cline analysis
#give each SNP a unique identifier
locus.info.filtered$uce<-locus.info.filtered$locus
locus.info.filtered$locus<-locus.info.filtered$marker.pos
#test for cline neutrality at each SNP with 1000 replicates
cline.object<-genomic.clines(introgress.data=count.matrix, hi.index=hi.index.sim, loci.data=locus.info.filtered,
sig.test = T, method = "parametric", n.reps = 1000, classification = T)
## genomic.clines is working; be patient, as this may take a while
## begining analysis.
## estimating genomic cline for: 30
## estimating genomic cline for: 31
## estimating genomic cline for: 37
## estimating genomic cline for: 68
## estimating genomic cline for: 123
## estimating genomic cline for: 226
## estimating genomic cline for: 245
## estimating genomic cline for: 262
## estimating genomic cline for: 390
## estimating genomic cline for: 406
## estimating genomic cline for: 489
## estimating genomic cline for: 496
## estimating genomic cline for: 600
## estimating genomic cline for: 627
## estimating genomic cline for: 644
## estimating genomic cline for: 708
## estimating genomic cline for: 748
## estimating genomic cline for: 766
## estimating genomic cline for: 785
## estimating genomic cline for: 795
## estimating genomic cline for: 821
## estimating genomic cline for: 844
## estimating genomic cline for: 859
## estimating genomic cline for: 899
## estimating genomic cline for: 946
## estimating genomic cline for: 990
## estimating genomic cline for: 1121
## estimating genomic cline for: 1353
## estimating genomic cline for: 1354
## estimating genomic cline for: 1444
## estimating genomic cline for: 1486
## estimating genomic cline for: 1508
## estimating genomic cline for: 1560
## estimating genomic cline for: 1582
## estimating genomic cline for: 1636
## estimating genomic cline for: 1701
## estimating genomic cline for: 1706
## estimating genomic cline for: 1855
## estimating genomic cline for: 1856
## estimating genomic cline for: 1952
## estimating genomic cline for: 2038
## estimating genomic cline for: 2044
## estimating genomic cline for: 2244
## estimating genomic cline for: 2284
## estimating genomic cline for: 2372
## estimating genomic cline for: 2388
## estimating genomic cline for: 2492
## estimating genomic cline for: 2538
## estimating genomic cline for: 2549
## estimating genomic cline for: 2625
## estimating genomic cline for: 2632
## estimating genomic cline for: 2644
## estimating genomic cline for: 2750
## estimating genomic cline for: 2820
## estimating genomic cline for: 2893
## estimating genomic cline for: 3018
## estimating genomic cline for: 3052
## estimating genomic cline for: 3081
## estimating genomic cline for: 3109
## estimating genomic cline for: 3165
## estimating genomic cline for: 3175
## estimating genomic cline for: 3185
## estimating genomic cline for: 3209
## estimating genomic cline for: 3214
## estimating genomic cline for: 3279
## estimating genomic cline for: 3299
## estimating genomic cline for: 3358
## estimating genomic cline for: 3432
## estimating genomic cline for: 3471
## estimating genomic cline for: 3479
## estimating genomic cline for: 3531
## estimating genomic cline for: 3552
## estimating genomic cline for: 3589
## estimating genomic cline for: 3694
## estimating genomic cline for: 3700
## estimating genomic cline for: 3768
## estimating genomic cline for: 3826
## estimating genomic cline for: 3895
## estimating genomic cline for: 3959
## estimating genomic cline for: 3972
## estimating genomic cline for: 4047
## estimating genomic cline for: 4080
## estimating genomic cline for: 4119
## estimating genomic cline for: 4267
## estimating genomic cline for: 4270
## estimating genomic cline for: 4406
## estimating genomic cline for: 4449
## estimating genomic cline for: 4450
## estimating genomic cline for: 4462
## estimating genomic cline for: 4488
## estimating genomic cline for: 4518
## estimating genomic cline for: 4595
## estimating genomic cline for: 4614
## estimating genomic cline for: 4644
## estimating genomic cline for: 4674
## estimating genomic cline for: 4761
## estimating genomic cline for: 4804
## estimating genomic cline for: 4869
## estimating genomic cline for: 4891
## estimating genomic cline for: 4899
## estimating genomic cline for: 4918
## estimating genomic cline for: 4936
## estimating genomic cline for: 5006
## estimating genomic cline for: 5015
## estimating genomic cline for: 5021
## estimating genomic cline for: 5022
## estimating genomic cline for: 5099
## estimating genomic cline for: 5111
## estimating genomic cline for: 5178
## estimating genomic cline for: 5232
## estimating genomic cline for: 5294
## estimating genomic cline for: 5352
## estimating genomic cline for: 5621
## estimating genomic cline for: 5632
## estimating genomic cline for: 5705
## estimating genomic cline for: 5750
## estimating genomic cline for: 5789
## estimating genomic cline for: 5832
## estimating genomic cline for: 5991
## estimating genomic cline for: 5998
## estimating genomic cline for: 6030
## estimating genomic cline for: 6044
## estimating genomic cline for: 6087
## estimating genomic cline for: 6129
## estimating genomic cline for: 6134
## estimating genomic cline for: 6151
## estimating genomic cline for: 6284
## estimating genomic cline for: 6326
## estimating genomic cline for: 6438
## estimating genomic cline for: 6542
## estimating genomic cline for: 6592
## estimating genomic cline for: 6679
## estimating genomic cline for: 6704
## estimating genomic cline for: 6717
## estimating genomic cline for: 6926
## estimating genomic cline for: 6936
## estimating genomic cline for: 6986
## estimating genomic cline for: 7047
## estimating genomic cline for: 7078
## estimating genomic cline for: 7122
## estimating genomic cline for: 7207
## estimating genomic cline for: 7208
## estimating genomic cline for: 7315
## estimating genomic cline for: 7327
## estimating genomic cline for: 7525
## estimating genomic cline for: 7658
## estimating genomic cline for: 7678
## estimating genomic cline for: 7698
## estimating genomic cline for: 7699
## estimating genomic cline for: 7765
## estimating genomic cline for: 7824
## estimating genomic cline for: 7825
## estimating genomic cline for: 7835
## estimating genomic cline for: 7855
## estimating genomic cline for: 7902
## estimating genomic cline for: 8022
## estimating genomic cline for: 8044
## estimating genomic cline for: 8167
## estimating genomic cline for: 8190
## estimating genomic cline for: 8197
## estimating genomic cline for: 8366
## estimating genomic cline for: 8384
## estimating genomic cline for: 8605
## estimating genomic cline for: 8609
## estimating genomic cline for: 8610
## estimating genomic cline for: 8630
## estimating genomic cline for: 8685
## estimating genomic cline for: 8690
## estimating genomic cline for: 8727
## estimating genomic cline for: 8809
## estimating genomic cline for: 8840
## estimating genomic cline for: 8901
## estimating genomic cline for: 8982
## estimating genomic cline for: 9008
## estimating genomic cline for: 9035
## estimating genomic cline for: 9075
## estimating genomic cline for: 9157
## estimating genomic cline for: 9257
## estimating genomic cline for: 9335
## estimating genomic cline for: 9344
## estimating genomic cline for: 9374
## estimating genomic cline for: 9525
## estimating genomic cline for: 9538
## estimating genomic cline for: 9547
## estimating genomic cline for: 9549
## estimating genomic cline for: 9565
## estimating genomic cline for: 9789
## estimating genomic cline for: 9871
## estimating genomic cline for: 9878
## estimating genomic cline for: 9958
## estimating genomic cline for: 9972
## estimating genomic cline for: 9994
## estimating genomic cline for: 10156
## estimating genomic cline for: 10218
## estimating genomic cline for: 10272
## estimating genomic cline for: 10289
## estimating genomic cline for: 10300
## estimating genomic cline for: 10329
## estimating genomic cline for: 10355
## estimating genomic cline for: 10367
## estimating genomic cline for: 10393
## estimating genomic cline for: 10394
## estimating genomic cline for: 10419
## estimating genomic cline for: 10432
## estimating genomic cline for: 10433
## estimating genomic cline for: 10482
## estimating genomic cline for: 10496
## estimating genomic cline for: 10567
## estimating genomic cline for: 10705
## estimating genomic cline for: 10716
## estimating genomic cline for: 10792
## estimating genomic cline for: 10856
## estimating genomic cline for: 10859
## estimating genomic cline for: 10860
## genomic clines analysis complete!
#we have 19 snps significantly non-neutral at a p value of 0
nrow(cline.object$Summary.data[cline.object$Summary.data[,4] == 0,])
## [1] 24
#because of our skewed sample size in LA between RC and LC, we have more statistical power to detect
#introgression from LC -> RC than vice versa. For this reason only look at significant LC->RC outliers.
cline.object$Summary.data[cline.object$Quantiles[,1] == 1 & cline.object$Summary.data[,4] == 0,]
## locus type lnL ratio P-value
## [1,] 30 1 32.118257 0
## [2,] 489 1 94.411768 0
## [3,] 2492 1 12.911472 0
## [4,] 3165 1 42.875520 0
## [5,] 3432 1 15.975614 0
## [6,] 4449 1 28.572559 0
## [7,] 4488 1 16.316756 0
## [8,] 4674 1 16.010809 0
## [9,] 5022 1 52.717615 0
## [10,] 6936 1 27.729711 0
## [11,] 8197 1 7.959912 0
#there are 10
#plot each cline
clines.plot(cline.data = cline.object, pdf = F, quantiles = T, cd=c("LC","het","RC"))
#reset plot window
dev.off()
## null device
## 1
Because of our skewed sample size in LA between RC and LC, we have more statistical power to detect introgression from LC -> RC than vice versa. For this reason we highlight significant LC->RC outliers.
#plot all the clines overlayed, and color the LC->RC significant outliers in red
cline.data<-cline.object
## homozygote
plot(0:1,0:1,type="n",xlab="Hybrid index",ylab="LC allele freq")
n.loci<-dim(cline.data$Loci.data)[1]
## Get needed values
hi<-cline.data$hybrid.index
ub<-cline.data$Neutral.AA[[1]][1,]
lb<-cline.data$Neutral.AA[[2]][1,]
## Plot neutral cline
polygon.data.matrix<-rbind(hi,lb,ub,lb,ub)[,order(hi)]
polygon.data.matrix.rev<-rbind(hi,lb,ub,lb,ub)[,order(-hi)]
polygon(c(polygon.data.matrix[1,],polygon.data.matrix.rev[1,]),
c(polygon.data.matrix[2,],polygon.data.matrix.rev[3,]), col="grey",border=NA)
## Plot observed Clines
for(i in 1:n.loci){
AA.line<-cline.data$Fitted.AA[i,]
line.matrix<-rbind(hi,AA.line)[,order(hi)]
lines(line.matrix[1,],line.matrix[2,],lty=1)
}
#plot significant outliers
for(i in c(1:n.loci)[cline.object$Quantiles[,1] == 1 & cline.object$Summary.data[,4] == 0]){
AA.line<-cline.data$Fitted.AA[i,]
line.matrix<-rbind(hi,AA.line)[,order(hi)]
lines(line.matrix[1,],line.matrix[2,],lty=1, col="red")
}
plot of all the clines overlayed, with significant LC->RC significant outliers in red